//go through all the models and output MAE and RMSE

clear all
set more off

**** GLOBALS *****;
global 	InputFile "C:\Dropbox\tax_approx\HSV\data\HSV_repdata.dta"
global 	OutputDir "C:\Dropbox\tax_approx\impact_calculation\"
*********************;


use ${InputFile} , clear



//levels of pre and postgov inc
gen pregovinc = max(redpregovinc - tot_deduction + 0.5*fica,0)
gen dispinc = max(reddispinc - tot_deduction + 0.5*fica + hwdpvpengain1,0)

//logs of pre and postgov inc
replace lpregovinc = log(redpregovinc - tot_deduction + 0.5*fica)
replace ldispinc = log(reddispinc - tot_deduction + 0.5*fica + hwdpvpengain1)



//get log_lambda and log_tau
reg ldispinc lpregovinc
scalar log_tau=_b[lpregovinc]
gen indi=e(sample)
keep if indi==1
predict yhatt
predict ress, r
sum ress
scalar log_lambda=exp(_b[_cons]+(r(sd)^2)/2)
di log_lambda


//get lambda and tau from 
poisson dispinc lpregovinc
scalar lambda=_b[_cons] 
scalar tau=_b[lpregovinc]


// implement example dataset
clear
set obs 48
input y
*0
*2000
5000
8000
10000
15000
30000
45000
65000
80000
90000
95000
100000
115000
125000
150000
200000
250000
300000 
500000
750000
1000000
1500000
2000000
2500000
3000000 
end

preserve

g y_net_ols=log_lambda*(y)^(log_tau) //lambda
label variable y_net_ols "Log OLS"

g y_net_ppml=exp(lambda)*y^tau
label variable y_net_ppml "PPML"

//tax schedules avg tax
g y_avgt_ols=1-log_lambda*(y)^(log_tau-1)
g y_avgt_ppml=1-exp(lambda)*(y)^(tau-1)

//tax schedules mrg tax
g y_mrgt_ols=1-log_lambda*log_tau*(y)^(log_tau-1)
g y_mrgt_ppml=1-exp(lambda)*tau*(y)^(tau-1)



export excel "${OutputDir}synth_data.xls", replace

tw line y_net_ols y , lc(black) lp(dash) || ///
line y_net_ppml y , lc(gray) lp(solid) || ///
line y y, legend(order(1 2) ring(0) pos(5) col(1)) lc(black) lp(dot)  scheme(s1mono) ytitle(" ") ///
xtitle(Gross Income) ytitle(Net Income) xlabel(,labsize(small)) ylabel(,labsize(small) nogrid) ///
xlabel(0 300000 600000 1000000 1500000 2000000 2500000 3000000, labsize(small)) ///
name(full_dist,replace)
graph export ${OutputDir}pred_fd.png , name(full_dist) replace


tw line y_avgt_ols y if y>2000, lc(black) lp(dash) || ///
line y_avgt_ppml y if y>2000 , lc(gray) lp(solid)  ///
legend(order(1 "Log OLS" 2 "PPML") ring(0) pos(5) col(1))  scheme(s1mono) ytitle(" ") ///
xlabel(0 300000 600000 1000000 1500000 2000000 2500000 3000000, labsize(small)) ///
ytitle(Avg. Tax Rate) xtitle(Gross Income)  xlabel(,labsize(small)) ylabel(,labsize(small) nogrid) name(avgt,replace)

tw line y_mrgt_ols y if y>2000, lc(black) lp(dash) || ///
line y_mrgt_ppml y if y>2000 , lc(gray) lp(solid)  ///
legend(order(1 "Log OLS" 2 "PPML") ring(0) pos(5) col(1))  scheme(s1mono) ytitle(" ") ///
xlabel(0 300000 600000 1000000 1500000 2000000 2500000 3000000, labsize(small)) ///
ytitle(Marginal Tax Rate) xtitle(Gross Income)  xlabel(,labsize(small)) ylabel(,labsize(small) nogrid) name(mrgt,replace)

graph combine avgt mrgt, graphregion(color(white)) rows(2)
graph export ${OutputDir}taxrates.png , replace

restore

//only do data under the threshold
keep if y<=300000


g y_net_ols=log_lambda*(y)^(log_tau) //lambda
label variable y_net_ols "Log OLS"

g y_net_ppml=exp(lambda)*y^tau
label variable y_net_ppml "PPML"



tw line y_net_ols y, lc(black) lp(dash) || ///
line y_net_ppml y, lc(gray) lp(solid) || ///
line y y, legend(order(1 2) ring(0) pos(5) col(1)) lc(black) lp(dot) scheme(s1mono) ytitle(" ") ///
ytitle(Net Income) xtitle(Gross Income)  xlabel(,labsize(small)) ylabel(,labsize(small) nogrid) ///
name(lower_end,replace)
graph export ${OutputDir}pred_le.png , name(lower_end) replace
















